#Importing Libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sn
import datetime
%matplotlib inline
df=pd.read_csv("Table2014_2023F.csv")
columns_to_convert = ['Solar generation (MW)','Embbeded solar generation (MW)','Embbeded wind generation (MW)','National demand (MW)','Total system demand (MW)','Market Index Price (£/MWh)','Market Index Volume (MWh)','Wind generation (MW)','Hydro generation (MW)','Biomass generation (MW)','System Buy Price(GBP/MWh)']
for column in columns_to_convert:
df[column] = pd.to_numeric(df[column], errors='coerce').astype('float32')
df.describe()
| System Buy Price(GBP/MWh) | Market Index Price (£/MWh) | Market Index Volume (MWh) | Settlement Period | National demand (MW) | Embbeded wind generation (MW) | Embbeded solar generation (MW) | Total system demand (MW) | Wind generation (MW) | Solar generation (MW) | Hydro generation (MW) | Biomass generation (MW) | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| count | 157338.000000 | 157338.000000 | 157338.000000 | 157338.000000 | 157338.000000 | 157338.000000 | 157338.000000 | 157338.000000 | 157338.000000 | 157338.000000 | 157338.000000 | 157338.000000 |
| mean | 70.108925 | 67.942780 | 872.352844 | 24.499123 | 29932.853516 | 1501.433960 | 1175.537231 | 31373.070312 | 5688.662109 | 1157.449951 | 418.494019 | 1110.033325 |
| std | 89.361053 | 70.670906 | 479.337952 | 13.852979 | 7182.784180 | 972.260681 | 1869.287964 | 6932.190430 | 4067.088379 | 1870.588501 | 254.493698 | 1077.196167 |
| min | 0.000000 | 0.000000 | 0.000000 | 1.000000 | 13367.000000 | 83.000000 | 0.000000 | 16629.000000 | 103.000000 | 0.000000 | 0.000000 | 0.000000 |
| 25% | 32.939999 | 35.522500 | 504.962509 | 12.000000 | 24318.000000 | 731.000000 | 0.000000 | 26018.000000 | 2432.000000 | 0.000000 | 208.000000 | 0.000000 |
| 50% | 46.116501 | 44.650002 | 784.099976 | 24.000000 | 29218.000000 | 1282.000000 | 23.000000 | 30561.000000 | 4734.000000 | 3.000000 | 386.000000 | 1127.000000 |
| 75% | 70.119001 | 62.567500 | 1157.199951 | 36.000000 | 34847.750000 | 2070.000000 | 1810.000000 | 35899.000000 | 7925.000000 | 1739.000000 | 594.000000 | 2083.000000 |
| max | 4037.800049 | 1983.660034 | 3743.350098 | 48.000000 | 79138.000000 | 5634.000000 | 13060.000000 | 80820.000000 | 20912.000000 | 9892.000000 | 1403.000000 | 3262.000000 |
corr_matrix=df.corr()
C:\Users\paulo\AppData\Local\Temp\ipykernel_3192\1544550014.py:1: FutureWarning: The default value of numeric_only in DataFrame.corr is deprecated. In a future version, it will default to False. Select only valid columns or specify the value of numeric_only to silence this warning. corr_matrix=df.corr()
fig, ax = plt.subplots(figsize=(20, 10))
sn.heatmap(corr_matrix,annot=True)
ax.set_title('Correlation Heatmap Table1')
plt.show()
from pandas_profiling import ProfileReport
C:\Users\paulo\AppData\Local\Temp\ipykernel_3192\2274191625.py:1: DeprecationWarning: `import pandas_profiling` is going to be deprecated by April 1st. Please use `import ydata_profiling` instead. from pandas_profiling import ProfileReport
#profile = ProfileReport(df)
#profile
profile = ProfileReport(df, explorative=True)
profile.to_widgets()
Summarize dataset: 0%| | 0/5 [00:00<?, ?it/s]
C:\Users\paulo\anaconda3\envs\tf\lib\site-packages\multimethod\__init__.py:315: FutureWarning: In a future version, `df.iloc[:, i] = newvals` will attempt to set the values inplace instead of always setting a new array. To retain the old behavior, use either `df[df.columns[i]] = newvals` or, if columns are non-unique, `df.isetitem(i, newvals)` return func(*args, **kwargs)
Generate report structure: 0%| | 0/1 [00:00<?, ?it/s]
Render widgets: 0%| | 0/1 [00:00<?, ?it/s]
VBox(children=(Tab(children=(Tab(children=(GridBox(children=(VBox(children=(GridspecLayout(children=(HTML(valu…
profile